<?php
namespace service\Address;

use artisan\cache;
use service\Address\ParseRules;

class AddressParser
{
    const ADDRESS_LEVEL_TOWN = '乡镇';
    const ADDRESS_LEVEL_VILLAGE = '村庄';
    const ADDRESS_LEVEL_POI = '兴趣点';
    const ADDRESS_LEVEL_DOOR = '门牌号';
    const ADDRESS_LEVEL_UNIT_NO = '单元号';
    const ADDRESS_LEVEL_ROAD = '道路';

    const FORMATTED_ADDRESS_SEPARATOR = '|';

    const PATTERN_STREET = '~(?:路|街道|街|巷)\d{1,5}(?:号|弄)~iu';

    const ARRAY_KEY_GEO = 'geo';

    /**
     * @var array
     */
    private $supportedAddressLevel = [
        self::ADDRESS_LEVEL_DOOR,
        self::ADDRESS_LEVEL_POI,
        self::ADDRESS_LEVEL_UNIT_NO,
        self::ADDRESS_LEVEL_ROAD,
    ];
    /**
     * pattern cache
     * @var array
     */
    private $patternCache = [];

    /**
     * address indexed cache
     * @var array
     */
    private $addressCache = [];

    /**
     * on processing data
     * @var array
     */
    private $currentData;

    /**
     * current waybill number
     * @var string
     */
    private $currentWaybill;

    /**
     * tobe processed address info
     * @var string|null
     */
    private $currentAddress;

    /**
     * parsed street info or poi info
     * @var string|null
     */
    private $currentStreet;

    /**
     * parsed building info
     * @var string|null
     */
    private $currentBuilding;

    /**
     * parsed building floor
     * @var string|null
     */
    private $currentFloor;

    /**
     * the original address
     *
     * @var string|null
     */
    private $currentOriginalAddress;
    private $currentProcessedOriginalAddress;

    /**
     * current address's minimum reliable unit
     * @var string
     */
    private $currentAddressMinReliableUnit;

    /**
     * map api returned formatted address
     * @var string|null
     */
    private $currentFormattedAddress;

    /**
     * map api return info level
     * @var string|null
     */
    private $currentAddressLevel;

    /**
     * when level is door number,parsing from formatted address switch
     * @var bool
     */
    private $parsingBuildingFromFormattedAddress = true;

    /**
     * is poi at the end position
     * @var bool
     */
    private $poiAtEndPosition = false;

    /**
     * @var array
     */
    private $disturbanceString = ['.', ',', '【', '】'];

    /**
     * @param array $data
     * @return mixed
     */
    public function analysis(array $data)
    {
        if(empty($data[static::ARRAY_KEY_GEO])){
            return $data;
        }
        $this->init($data);
        if ($this->isUnsupportedAddress()) {
            return $this->currentData;
        }
        if (!empty($this->addressCache[$this->currentOriginalAddress])) {
            return $this->addressCache[$this->currentOriginalAddress];
        }
        $this->addressPreProcess();
        $parsedInfo['street'] = $this->parseStreet();
        $parsedInfo['building'] = $this->parseBuilding();
        $parsedInfo['floor'] = $this->parseFloor();
        $parsedInfo['poi'] = $this->currentAddressLevel === static::ADDRESS_LEVEL_POI ? $parsedInfo['street'] : '';
        return $this->addressCache[$this->currentOriginalAddress] = $this->formatReturnData($parsedInfo);
    }

    /**
     * @param array $append
     * @return array
     */
    private function formatReturnData(array $append)
    {
        list($this->currentData[static::ARRAY_KEY_GEO]['longitude'], $this->currentData[static::ARRAY_KEY_GEO]['latitude']) = explode(',', strpos((string)$this->currentData[static::ARRAY_KEY_GEO]['location'], ',') ? $this->currentData[static::ARRAY_KEY_GEO]['location'] : ',');
        //unset($this->currentData[static::ARRAY_KEY_GEO]['location']);
        //for ios
        $this->currentData[static::ARRAY_KEY_GEO]['address'] = $this->formatReturnAddress($this->currentProcessedOriginalAddress);
        $this->currentData[static::ARRAY_KEY_GEO]['formatted_address'] = str_replace(static::FORMATTED_ADDRESS_SEPARATOR, '', $this->currentStreet ?  $this->getStrFragment($this->currentFormattedAddress, $this->currentAddressMinReliableUnit).$this->currentAddressMinReliableUnit.$this->currentStreet : (string)$this->currentFormattedAddress);
        $this->currentData['info'] = $this->currentData[static::ARRAY_KEY_GEO] + $append;
        unset($this->currentData[static::ARRAY_KEY_GEO]);
        return $this->currentData;
    }

    /**
     * @param $address
     */
    private function formatReturnAddress($address)
    {
        if($address && $this->currentAddressMinReliableUnit){
            $address = explode($this->currentAddressMinReliableUnit, $address);
            if(count($address) === 1){
                $address = explode($this->currentData[static::ARRAY_KEY_GEO]['city'], reset($address));
            }
            $address = end($address);
        }
        $address = trim(str_replace([',', '.', ' ', '_', '|', '^','"','\r', '\n', '\t','&nbsp;', '\\'], '', $address), '/');
        return preg_replace('/\(\d{6}\)|\[\d{6}\]|\{\d{6}\}|<\d{6}>|(?:(?<!\d)\d{6}$)/', '', $address);
    }

    /**
     * @param array $data
     */
    private function init(array $data)
    {
        $this->currentData = $data;
        $this->currentWaybill = $data['waybillNo'];
        $this->currentAddressLevel = $data[static::ARRAY_KEY_GEO]['level'];
        $this->currentOriginalAddress = $data[static::ARRAY_KEY_GEO]['address'];
        $this->currentFormattedAddress = $data[static::ARRAY_KEY_GEO]['formatted_address'];
        $this->currentAddressMinReliableUnit = $data[static::ARRAY_KEY_GEO]['district'] ?: $data[static::ARRAY_KEY_GEO]['city'];
        $this->currentStreet = $this->currentBuilding = $this->currentFloor = '';
        $this->poiAtEndPosition = false;
        $this->parsingBuildingFromFormattedAddress = true;
    }

    /**
     * @return array|bool
     */
    private function isUnsupportedAddress()
    {
        if (!in_array($this->currentAddressLevel, $this->supportedAddressLevel, true) || empty($this->currentFormattedAddress) || empty($this->currentData[static::ARRAY_KEY_GEO]['location']) || empty($this->currentAddressMinReliableUnit)) {
            return $this->formatReturnData($this->getDefaultParsedAddress());
        }
        return false;
    }

    /**
     * @return array
     */
    private function getDefaultParsedAddress()
    {
        return [
            'street'    => '',
            'building'  => '',
            'floor'     => '',
            'poi'       => '',
            'longitude' => '',
            'latitude'  => '',
        ];
    }

    /**
     * @return string
     * @todo remove currentProcessedOriginalAddress
     */
    private function addressPreProcess()
    {
        $address = $this->convertNumber($this->currentProcessedOriginalAddress = $this->convertFullToHalf($this->currentOriginalAddress));
        if (is_string($this->currentAddressMinReliableUnit) && $addresses = explode($this->currentAddressMinReliableUnit, $address)) {
            $address = end($addresses);
        }
        return $this->currentAddress = str_replace($this->disturbanceString, '', $address);
    }

    /**
     * @return mixed|null|string
     */
    protected function parseStreet()
    {
        if ($this->currentAddressLevel === static::ADDRESS_LEVEL_POI) {
            $street = $this->getStrFragment($this->currentFormattedAddress, $this->currentAddressMinReliableUnit, 'end');
            //could remove pre check
            if(strpos($street, static::FORMATTED_ADDRESS_SEPARATOR) !== false){
                $street = str_replace(static::FORMATTED_ADDRESS_SEPARATOR, '', $street);
            }
            $this->currentAddress = $this->getStrFragment($this->currentAddress, $street, 'end') ?: $this->currentAddress;
            return $this->currentStreet = $street;
        } elseif ($this->currentAddressLevel === static::ADDRESS_LEVEL_DOOR || ($noReturn = $this->currentAddressLevel === static::ADDRESS_LEVEL_ROAD)) {
            $tmpAddress = $this->getStrFragment($this->currentFormattedAddress, $this->currentAddressMinReliableUnit, 'end');
            $this->currentStreet = $this->calcStreetName($tmpAddress);
            if (empty($noReturn)) {
                return $this->currentStreet;
            } else {
                $this->currentAddress = $this->getStrFragment($this->currentAddress, $this->convertNumber($this->currentStreet), 'end');
            }
        }
        $street = $this->getStrByPattern($this->currentAddress, $this->getPattern(__FUNCTION__));
        return $this->currentStreet .= $street;
    }

    /**
     * @param string $str
     * @return string
     */
    private function calcStreetName($str)
    {
        $addresses = explode(static::FORMATTED_ADDRESS_SEPARATOR, $str);
        $street = $str;
        if (count($addresses) > 2) {
            if(is_numeric($addresses[1])){
                $addresses[1] = $this->calcBuildingName($addresses[1]);
            }
            //update currentAddress???
            $street = implode('', array_slice($addresses, 0, -1));
        } elseif (count($addresses) === 2) {
            $tmpAddress = implode('', $addresses);
            if (preg_match('#(?:路|街道|大道|街)\d{1,4}号#iu', $tmpAddress)) {
                $this->parsingBuildingFromFormattedAddress = false;
                $this->currentAddress = $this->getStrFragment($this->currentAddress, $addresses[1], 'end');
                $street = $tmpAddress;
            } else {
                $street = $addresses[0];
            }
        }
        return $street;
    }

    /**
     * @return mixed|null|string
     */
    protected function parseBuilding()
    {
        $address = $this->parseBuildingPreProcess();
        if ($this->currentAddressLevel === static::ADDRESS_LEVEL_DOOR && $this->parsingBuildingFromFormattedAddress && strpos($this->currentFormattedAddress, static::FORMATTED_ADDRESS_SEPARATOR) !== false) {
            $this->currentBuilding = $this->calcBuildingName($this->getStrFragment($this->currentFormattedAddress, static::FORMATTED_ADDRESS_SEPARATOR, 'end'));
        } elseif ($this->currentAddressLevel === static::ADDRESS_LEVEL_POI && $this->poiAtEndPosition) {
            $this->currentBuilding = '';
        } elseif ($this->currentAddressLevel === static::ADDRESS_LEVEL_ROAD) {
            $this->currentBuilding = $this->getStrByPattern($address, $this->getPattern('road_building'));
        } else {
            $this->currentBuilding = $this->getStrByPattern($address, $this->getPattern(__FUNCTION__));
        }
        return $this->currentBuilding;
    }

    /**
     * @param string $possibleStr
     * @return string
     */
    private function calcBuildingName($possibleStr)
    {
        if(ctype_alnum($possibleStr)){
            $patternTpl = '~((?<!\d)%d(?:号楼|D|#|栋|H|幢|单元))~iu';
            if(preg_match(sprintf($patternTpl, $possibleStr), $this->currentAddress, $match)){
                return $match[0];
            }
        }
        return $possibleStr;
    }

    /**
     * limit string set size
     * @return mixed|null
     * @todo 待观察
     */
    private function parseBuildingPreProcess()
    {
        if ($addresses = $this->splitByStreetPattern($this->currentAddress)) {
            $this->currentAddress = end($addresses);
        }
        if ((!$addresses && $this->currentStreet) || $this->currentAddressLevel === static::ADDRESS_LEVEL_POI) {
            $delimiter = mb_strlen($this->currentStreet, 'UTF-8') > 3 ? mb_substr($this->currentStreet, -3) : $this->currentStreet;
            //when poi info at end position, the final data still using before processed one
            $this->currentAddress = $this->getStrFragment($this->currentAddress, $delimiter, 'end') ?: ($this->poiAtEndPosition = true ? $this->currentAddress : '');
        }
        return $this->currentAddress;
    }

    /**
     * @return string
     */
    protected function parseFloor()
    {
        $address = $this->currentAddress;
        if ($this->currentBuilding) {
            $address = $this->getStrFragment($address, $this->currentBuilding, 'end');
        }
        return $this->currentFloor = $this->getStrByPattern($address, $this->getPattern(__FUNCTION__));
    }

    /**
     * @param string $str
     * @return array|bool
     */
    private function splitByStreetPattern($str)
    {
        if ($this->currentAddressLevel !== static::ADDRESS_LEVEL_ROAD) {
            $addresses = preg_split(static::PATTERN_STREET, $str);
            if (count($addresses) > 1) {
                return $addresses;
            }
        }
        return false;
    }

    /**
     * @param string $str
     * @param string $delimiter
     * @param null|mixed $index
     * @return mixed|null
     */
    private function getStrFragment($str, $delimiter, $index = null)
    {
        $ret = $str;
        if ($str && $delimiter && stripos($str, $delimiter) !== false) {
            if($index === 'all'){
                $ret = str_replace($delimiter, '', $str);
            }else{
                $strArr = explode($delimiter, $str);
                $ret = is_int($index) ? (empty($strArr[$index]) ? null : $strArr[$index]) : (in_array($index, [
                    null,
                    'first',
                ], true) ? reset($strArr) : end($strArr));
            }
        }
        return $ret;
    }

    /**
     * @param string $str
     * @param string $pattern
     * @return mixed|string
     */
    private function getStrByPattern($str, $pattern)
    {
        $result = '';
        if ($str && $pattern && preg_match($pattern, $str, $match)) {
            unset($match[0]);
            $match = array_filter($match);
            $result = reset($match);
        }
        return $result;
    }

    /**
     * @param $type
     * @return mixed|null
     */
    protected function getPattern($type)
    {
        if ($type = $this->getPatternType($type)) {
            if (empty($this->patternCache[$type])) {
                $this->patternCache[$type] = $this->buildPattern($type);
            }
            return $this->patternCache[$type];
        }
        return null;
    }

    /**
     * @param $type
     * @return string
     */
    private function buildPattern($type)
    {
        if ($rules = ParseRules::get($type)) {
            return '~' . implode('|', $rules) . '~iu';
        }
        return '';
    }

    /**
     * @param $type
     * @return mixed
     */
    private function getPatternType($type)
    {
        return str_replace('parse', '', strtolower($type));
    }

    /**
     * @param string $address
     * @return string
     */
    private function convertFullToHalf($address)
    {
        $rules = [
            '０'=>'0', '１'=>'1', '２'=>'2', '３'=>'3', '４'=>'4','５'=>'5', '６'=>'6', '７'=>'7', '８'=>'8', '９'=>'9',
            'Ａ'=>'A', 'Ｂ'=>'B', 'Ｃ'=>'C', 'Ｄ'=>'D', 'Ｅ'=>'E','Ｆ'=>'F', 'Ｇ'=>'G', 'Ｈ'=>'H', 'Ｉ'=>'I', 'Ｊ'=>'J',
            'Ｋ'=>'K', 'Ｌ'=>'L', 'Ｍ'=>'M', 'Ｎ'=>'N', 'Ｏ'=>'O','Ｐ'=>'P', 'Ｑ'=>'Q', 'Ｒ'=>'R', 'Ｓ'=>'S', 'Ｔ'=>'T',
            'Ｕ'=>'U', 'Ｖ'=>'V', 'Ｗ'=>'W', 'Ｘ'=>'X', 'Ｙ'=>'Y','Ｚ'=>'Z', 'ａ'=>'a', 'ｂ'=>'b', 'ｃ'=>'c', 'ｄ'=>'d',
            'ｅ'=>'e', 'ｆ'=>'f', 'ｇ'=>'g', 'ｈ'=>'h', 'ｉ'=>'i','ｊ'=>'j', 'ｋ'=>'k', 'ｌ'=>'l', 'ｍ'=>'m', 'ｎ'=>'n',
            'ｏ'=>'o', 'ｐ'=>'p', 'ｑ'=>'q', 'ｒ'=>'r', 'ｓ'=>'s', 'ｔ'=>'t', 'ｕ'=>'u', 'ｖ'=>'v', 'ｗ'=>'w', 'ｘ'=>'x',
            'ｙ'=>'y', 'ｚ'=>'z', '（'=>'(', '）'=>')', '〔'=>'(', '〕'=>')', '【'=>'[','】'=>']', '〖'=>'[', '〗'=>']',
            '“'=>'"', '”'=>'"', '‘'=>'\'', '＇'=>"'", '｛'=>'{', '｝'=>'}', '《'=>'<','》'=>'>','％'=>'%', '＋'=>'+',
            '—'=>'-', '－'=>'-', '～'=>'~','：'=>':', '。'=>'.', '、'=>',', '，'=>',', '；'=>';', '？'=>'?', '！'=>'!',
            '…'=>'-', '單' => '单', '區' => '区', '棟' => '栋',
        ];

        return strtr($address, $rules);
    }

    /**
     * @param string $str
     * @return string
     */
    private function convertNumber($str, $reverse = false)
    {
        if(is_string($str)){
            $map = ['〇'=>'0', '一层'=>'1层','一梯'=>'1梯', '一楼'=>'1楼', '一单元' => '1单元', '一号楼' => '1号楼',
                    '一幢' => '1幢',  '二'=>'2', '三'=>'3', '四'=>'4', '五'=>'5', '六'=>'6', '七'=>'7', '八'=>'8',
                    '九'=>'9',];
            $str = strtr($str, $reverse ? array_flip($map) : $map);
            if(!$reverse && strpos($str, '零')){
                $str = preg_replace('/([1-9]\d?)零(\d)/','${1}0${2}', $str);
            }
        }
        return $str;
    }
}
